#!/usr/bin/python
"""
%InsertOptionParserUsage%


@author: 	Arthur Kantor
@contact: 	akantorREMOVE_THIS@uiuc.edu
@copyright:	Arthur Kantor 2008
@license: 	GPL version 3
@date: 		11/18/2008
@version: 	0.9

"""

import sys, re, os;
import logging
import time
from optparse import OptionParser
from gmtkParam import *
from util import *
from util.UniqueList import *
import copy



def makeParser():
	usage = """
genTriUnitParams.py [--help] [options] 
Takes a trainable parameters file, with one gaussian per mixture, and generates a tri-unit (e.g. tri-phone) trainable parameters file,
tied across left and right contexts.

Also generates a named collection and certain kinds of feature value files

The name format for input gaussian mixtures is:
	gm:featureKind:unit:subUnitState e.g.
	gm:plp:AA:0

The output will be of form 
	gm:plp:AE:AA:AE:0
"""
	parser = OptionParser()
	parser.usage=usage
	
	parser.add_option("-i", "--inTrainedParamFile", type="string", metavar="FILE", 
					  help="<REQUIRED>.  a trainable parameters file, with one gaussian per mixture")
	
	parser.add_option("-o", "--outTrainedParamFile", type="string", metavar="FILE", 
					  help="<REQUIRED>.  The output trainable parameters file, into which the tied triphone mixtured will be written.")
	
	parser.add_option("-n", "--nameCollectionFile", type="string", metavar="FILE",	help="If specified, store the collection of GM names in FILE")
	
	parser.add_option("-r", "--orderNameCollectionFile", type="string", metavar="FILE",
					  help="If specified, the tri-units are generated by iterating through the units in the order listed in orderNameCollectionFile.  The names should be in the gm:featureKind:unit:subUnitState described above. If unspecified the units are iterated alphabetically.")

	parser.add_option( "--nameCollectionName", type="string", metavar="STR",
					help="The name of the collection.  Default: the base name of the --nameCollectionFile.")

	parser.add_option("-f", "--featureValuesFile", type="string", metavar="FILE",	
					help="If specified, generate feature values and store them in FILE")
	
	parser.add_option("--featureValuesName", type="string", metavar="STR",
					help="The name of the feature values set.  Default: the base name of the --featureValuesName.")
	
	parser.add_option("-d", "--featureDefinitionsFile", type="string", metavar="FILE",	
					help="If specified, generate feature definitions and store them in FILE.")
	
	parser.add_option("--featureDefinitionsName", type="string", metavar="STR",
					help="The name of the feature Definitions set.  Default: the base name of the --featureDefinitionsFile.")
					
	parser.add_option("-q", "--questionsFile", type="string", metavar="FILE",	
					help="If specified, generate distinctive features questions about phoneme context and store them in FILE.")
	
	parser.add_option("--questionsName", type="string", metavar="STR",
					help="The name of the questions set.  Default: the base name of the --questionsFile.")
	

	parser.add_option("-k", "--featureValuesKind", type="choice", metavar="STR", choices=['triUnit','phoneticContext'], default='triUnit',
					help="How to generate featureValues. Can be 'triUnit' or 'phoneticContext'. Default: %default")
	
	parser.add_option('-l', '--leftContextDT', type="string", metavar="FILE",	
					help="write DT mapping units to left context to FILE. requires --featureValuesKind phoneticContext")

	parser.add_option("-c", "--cardFile", type="string", metavar="FILE", 
					help="If specified, stores the number of entries in the named collection into this file.")
	
	parser.add_option("-u", "--untieCmdFile", type="string", metavar="FILE", 
					help="If specified, generates the untie gmtkTie command file.")

	parser.add_option("-t", "--clusterCmdFile", type="string", metavar="FILE", 
					help="If specified, generates the cluster gmtkTie command file.")

	parser.add_option("-v", "--verbosity", type="int", default = 51-logging.INFO,
		help="Prints debug info to STDOUT ranges from 1 (critical) to 50 (everything) default: %default")
	return parser

def parseOpts():	
	opts, args = parser.parse_args()
	if not (opts.inTrainedParamFile and opts.outTrainedParamFile):
		parser.error("need all of the following defined: inTrainedParamFile outTrainedParamFile")
	
	if not opts.nameCollectionName and opts.nameCollectionFile:
		opts.nameCollectionName=os.path.basename(opts.nameCollectionFile).rsplit('.',1)[0]
		
	if not opts.featureValuesName and opts.featureValuesFile:
		opts.featureValuesName=os.path.basename(opts.featureValuesFile).rsplit('.',1)[0]
		
	if not opts.featureDefinitionsName and opts.featureDefinitionsFile:
		opts.featureDefinitionsName=os.path.basename(opts.featureDefinitionsFile).rsplit('.',1)[0]
	
	if not opts.questionsName and opts.questionsFile:
		opts.questionsName=os.path.basename(opts.questionsFile).rsplit('.',1)[0]
	
	return opts
	
def main(argv):
	cmd=' '.join(argv)
	opts = parseOpts()

	#set up logging
	logging.basicConfig(stream=sys.stdout,format='%(levelname)s %(message)s',level=51-opts.verbosity)
	logging.info('Program started on %s as %s', time.ctime(), cmd)
	
	comment= '%'+" generated with command: %s\n\n"%cmd
	
	ws=Workspace()
	ws.readTrainableParamsFile(opts.inTrainedParamFile)
	logging.info('read readTrainableParamsFile %s', opts.inTrainedParamFile)
	gm=ws[MG]

	orderColl=None
	if opts.orderNameCollectionFile:
		ws.readFromFile(NameCollection,opts.orderNameCollectionFile)
		logging.info('read NameCollection %s', opts.orderNameCollectionFile)
		orderColl=ws[NameCollection].values()[0] #there is only one value

	if opts.featureValuesFile:
		fvo=openIOForWrite(opts.featureValuesFile, comment)
	else:
		fvo=openIOForWrite('/dev/null', comment)	
	fvout = FeatureValuesOutStream(fvo, opts.featureValuesName, opts.featureDefinitionsName)

	if opts.nameCollectionFile:
		nco=openIOForWrite(opts.nameCollectionFile, comment)
	else:
		nco=openIOForWrite('/dev/null', comment)
	nco.writelnWord('1\n\n0')	
	nco.writelnWord(opts.nameCollectionName)
	ncout=UnnumberedOutStream(nco)

	#we normally would just say ws.writeTrainableParamsFile(opts.outTrainedParamFile)
	#but since we have too many MG's to fit in memory, we write out the file in pieces
	#first just copy everything except the MGs
	to = WorkspaceIO(file(opts.outTrainedParamFile,'w'))
	for typ in [DPMF, SPMF, MEAN, COVAR,DLINK_MAT,WEIGHT_MAT, DCPT,GC]:
		ws.writeToIO(typ,to)
	tpout=NumberedObjectOutStream(to, MG)

	#the main thing
	if opts.featureValuesKind == 'triUnit':
		fg=TriunitFeatureGenerator(gm,orderColl)
	elif opts.featureValuesKind == 'phoneticContext':
		fg=PhoneticContextFeatureGenerator(gm,orderColl)
	fg.writeTriUnits(tpout, ncout, fvout)
	
	tpout.finalize()
	fvout.finalize()
	ncout.finalize()

	#write out the rest of trainable parameters
	for typ in [GSMG, LSMG, MSMG, ]:
		ws.writeToIO(typ,to)
	to.close()
	logging.info('closed  trainable params file %s ', opts.outTrainedParamFile)
	
	fvo.close()
	logging.info('closed  feature values file %s ', fvo.name)
	
	nco.close()
	logging.info('closed  name collection file %s ', nco.name)
	
	#print (newNameColl, newGMs[MG].values(), fv)
	
	if opts.featureDefinitionsFile:
		io=openIOForWrite(opts.featureDefinitionsFile, comment)
		fd=fg.genFeatureDefs(opts.featureDefinitionsName,)
		fd.writeToFile(io)
		io.close()
		logging.info('closed feature defs file %s ', opts.featureDefinitionsFile)
		

	if opts.questionsFile:
		io=openIOForWrite(opts.questionsFile, comment)
		q=fg.getQuestions(opts.questionsName, opts.featureValuesName)
		q.writeToFile(io)
		io.close()
		logging.info('closed questions file %s ', opts.questionsFile)

	if opts.cardFile:
		f=open(opts.cardFile,'w')
		f.write("STATE_CARD %d\n"%fvout.objCount)
		if opts.featureValuesKind == 'phoneticContext':
			f.write('CONTEXT_CARD %d\n'%fg.contextCard())
		f.close()
		logging.info('closed card file %s ', f.name)
		
	if opts.untieCmdFile:
		f=openIOForWrite(opts.untieCmdFile, comment)
		fg.writeUntieCmds(f,opts.nameCollectionName)
		f.close()	
		logging.info('closed untie commands file %s ', f.name)

	if opts.clusterCmdFile:
		f=openIOForWrite(opts.clusterCmdFile, comment)
		fg.writeTieCmds(f,opts.nameCollectionName, opts.featureDefinitionsName, opts.featureValuesName, opts.questionsName)
		f.close()	
		logging.info('closed tie commands file %s ', f.name)

	if opts.leftContextDT:
		if not opts.featureValuesKind == 'phoneticContext':
			logging.warning('--leftContextDT option requires --featureValuesKind phoneticContext. Not generating context DTs in file %s',opts.leftContextDT)
		else:
			logging.info('generating left contexts')
			dtWs=fg.genLeftContextDT()
			logging.info('writing left context DT to %s', opts.leftContextDT)
			dto=WorkspaceIO(file(opts.leftContextDT,'w'))
			dtWs.writeToIO(DT,dto)
			dto.close()



def openIOForWrite(fileName,comment):
	f=file(fileName,'w')
	wsIO=WorkspaceIO(f)
	wsIO.writelnWord(comment)	
	return wsIO

def cartesian_product(L,*lists):
    if not lists:
        for x in L:
            yield (x,)
    else:
        for x in L:
            for y in cartesian_product(lists[0],*lists[1:]):
                yield (x,)+y
		
class TriunitFeatureGenerator(object):
	def __init__(self, gm,orderedList=None):
		self._gm=gm
		self._gmPrefix= gm.keys()[0].rsplit(':',2)[0] #get the gm:plp part
		#print self._gmPrefix

		if not orderedList:
			orderedList=sorted(gm.keys())
		if len(orderedList) != len(gm.keys()):
			raise ValueError("len(orderedList) == %d != %d == len(gm.keys()))"%(len(orderedList) , len(gm.keys())))

		self._subUnitNames=[i.split(':',2)[2] for i in orderedList]
		#self._subUnitNames=self._subUnitNames[:140]

		self._contexts=UniqueList([i.split(':')[0] for i in self._subUnitNames])
		logging.info('unit Names: %d, sub-unit names: %d', len(self._contexts), len(self._subUnitNames))

		newWS=Workspace()
		self._newWS = newWS
	
	def contextCard(self):
		return len(self._contexts)
	
	def writeTriUnits(self, tpout, ncout,  fvout):
		'''writes the Triunits
			@param tpout: where to write MGs 
			@param ncout:  where to write named collection
			@param fvout:  where to write feature values
		'''
				
		logging.info('generating %d triUnits...', self.contextCard()**2*len(self._subUnitNames))
		i = 0

		for p,n,subUnitName in cartesian_product(self._contexts,self._contexts,self._subUnitNames):
			i +=1
			if i%100000 == 0:
				sys.stdout.write('.')
				sys.stdout.flush()
				
			c,cs=subUnitName.split(':')
			newName = "%s:%s:%s:%s:%s"%(self._gmPrefix,p,c,n,cs)
			ncout._io.writelnWord(newName)
			ncout.objCount += 1
			
			oldname=self._gmPrefix+':'+subUnitName
			curGm=self._gm[oldname]
			curGm.name=newName
			tpout.writeObject(curGm)
			curGm.name=oldname

			fvout.writeObject(FeatureValues(newName, (p,subUnitName,n)))
		logging.info('finished generating %d triUnits...', self.contextCard()**2*len(self._subUnitNames))
	
	def genFeatureDefs(self,featureDefName):
		self._fd = FeatureDefinitionList(self._newWS, featureDefName)
		self._fd.append(FeatureDefinition('leftContext', self._contexts))
		self._fd.append(FeatureDefinition('centerUnit', self._subUnitNames))
		self._fd.append(FeatureDefinition('rightContext', self._contexts))
		return self._fd	
			
	def getQuestions(self, name, fvname):
		contexts = [('left_is',0,DistictiveQuestionsFeatures(),-1),('right_is',2,DistictiveQuestionsFeatures(),0)]
		qc = QuestionCollection(self._newWS, name, self._fd.name, fvname)
		
		for (prefix,featCol, distinctiveQuestions, relPhoneIdx) in contexts:
			for u in self._contexts:
				if u[0] == '.' and u[-1]== '.': 
					#add units to the question sets if the last/first phoneme of the unit
					#exists in the left/right question set
					phones=u[1:-1].split('_')
					for ps in distinctiveQuestions.values():
						if phones[relPhoneIdx] in ps:
							ps.append(u)
							
			
			qs = [ Question("%s_%s"%(prefix,q), self._fd[featCol].name, v) 
		 			for q,v in distinctiveQuestions.items()]
			qc.update([(q.name, q) for q in qs])
		return qc


	def writeUntieCmds(self,fh, colName):
		cmds=[]
		dq = DistictiveQuestionsFeatures()
		units={}
		for (u,subUnitIdx) in [ su.split(':') for su in self._subUnitNames]:
			if u in units:
				units[u].append(int(subUnitIdx))
			else:
				units[u]=[int(subUnitIdx)]

		ostr=NumberedObjectOutStream(fh,'gmtkTie commands')

		#ostr.writeComment('%untie all first.  this seems much faster than tying only what''s needed')
		#ostr.writeComment('%FIXME the above is no longer true, after a bug was fixed in gmtkTie')
		#ostr.writelnString('untie Mixture gm:plp:.* 0')

		ostr.writeComment('\n%Even though all units are fully tied, we tie again using ')
		ostr.writeComment('%the named collection, to reduce the number of physical GMs.')

		ostr.writeComment('\n%tie the filled-pause models')
		ostr.writeComment('%the end of word token is left to be context dependent')
		for u in dq['filled_pauses']: 
			for i in range(3):
				ostr.writelnString('tie Mixture gm:plp:[A-Z._]+:%s:[A-Z._]+:%d 2 Centroid=Arbitrary CollectionName=%s '%(u,i,colName))
		
		ostr.writeComment('\n%now tie the inner states of units longer than a single phoneme')
		ostr.writeComment('\n%In a unit with K states, the inner states are 2,3,...K-3 inclusive.')
		for u in sorted(units):
			if u[0] == '.' and u[-1]== '.': #untie only the boundary sub-states
				for i in units[u]:
					if i>1 and i < max(units[u])-1:
						ostr.writelnString('tie Mixture gm:plp:[A-Z._]+:%s:[A-Z._]+:%d 2 Centroid=Arbitrary CollectionName=%s '%(u,i,colName))


		ostr.writeComment('\n%Now untie the boundary states for all units besides filled pauses ')
		for u in sorted(units):
			if u in dq['filled_pauses']: #don't untied filled pauses
				continue
			
			if u[0] == '.' and u[-1]== '.': #untie only the boundary sub-states
				boundaryUnits = [i for i in units[u] if i<=1 or i>=max(units[u])-1]
			else:
				boundaryUnits = units[u]

			for i in boundaryUnits:
				ostr.writelnString('untie Mixture gm:plp:[A-Z._]+:%s:[A-Z._]+:%d 0'%(u,i))
					
		ostr.finalize()

	def writeTieCmds(self,fh, colName, featureDefsName, featureValuesName, questionsName):
		
		#first we write the CPP defines:
		defines = '''
#define DT_COMMAND(PHONE,STATE) \\
DTcluster Mixture gm:plp:[A-Z._]+:PHONE:[A-Z._]+:STATE \\
11 \\
CollectionName=%s \\
FeatureSetName=%s \\
FeatureValuesName=%s \\
QuestionSetName=%s \\
TreeName=triphone_tree_ ## PHONE:STATE \\
MinClusterMembers=1 \\
MinOccupancyCount=MIN_OCCUPANCY_COUNT \\
ThresholdOccupancyCount=1 \\
MinImprovementPercent=MIN_IMPROVEMENT_PERCENT \\
CollectionName=%s \\
CentroidOccupancyWeighting=true

#define SAVE_TREE(PHONE,STATE) \\
saveTree \\
2 \\
Filename=MODEL_DIR/tie/clusterTrees/triphone_tree_ ## PHONE:STATE \\
TreeName=triphone_tree_ ## PHONE:STATE

''' %(colName, featureDefsName, featureValuesName, questionsName, colName)
		fh.writelnWord(defines)
				
		ostr=NumberedObjectOutStream(fh,'gmtkTie commands')
		ostr.writelnString('loadFeatureDefinitionSet 1 Filename=MODEL_DIR/tie/featureDefs.txt')
		ostr.writelnString('loadFeatureValueSet 1 Filename=MODEL_DIR/tie/featureValues.txt')
		ostr.writelnString('loadQuestionSet 1 Filename=MODEL_DIR/tie/questions.txt')


		ostr.writeComment('%cluster each substate of each simple phone, and boundary substates of each .COMPLEX_PHONE.')
		ostr.writeComment('%substates of non-speech simple phones and internal states of complex phones should not have been untied .')

		cmds=[]
		units={}
		for (u,subUnitIdx) in [ su.split(':') for su in self._subUnitNames]:
			if u in units:
				units[u].append(int(subUnitIdx))
			else:
				units[u]=[int(subUnitIdx)]

		dq = DistictiveQuestionsFeatures()
		for u in dq['filled_pauses']: #filled pauses already tied
			del units[u]

		for u in units:
			if u[0] == '.' and u[-1]== '.': #non-boundary sub-states already tied
				m=max(units[u])
				units[u] = [i for i in units[u] if i<=1 or i>=m-1]
			
		ostr.writeComment('\n%the cluster commands')
		for u in sorted(units):
			for i in sorted(units[u]):
				ostr.writelnString('DT_COMMAND(%s,%d)'%(u,i))
		ostr.writeComment('\n%the save tree commands')
		for u in sorted(units):
			for i in sorted(units[u]):
				ostr.writelnString('SAVE_TREE(%s,%d)'%(u,i))
		ostr.finalize()

class PhoneticContextFeatureGenerator(TriunitFeatureGenerator):
	'''the only difference from base is that we only allow phones as context units.
		contexts are a subset of unitNames, and also if a context has index i in contexts,
		it also must have index i in unitNames. (contexts are followed by non-context units in unitNames).
	'''
	def __init__(self, gm,orderedList=None):
		super(PhoneticContextFeatureGenerator,self).__init__(gm,orderedList)
		self._unitNames=self._contexts
		self._contexts = [u for u in self._contexts if not (u[0] =='.' and u[-1] =='.')] 
		print self._unitNames
		
	def genLeftContextDT(self):
		contextToId=dict([(c,i) for (i,c) in enumerate(self._contexts)])
		dtWS=Workspace()

		#build the left tree bottom up
		noTransitionLeaf=TreeBranch(-1,TreeLeaf('{p0}'),'keep the previous value')
		contextEqualsUnitLeaf=TreeBranch(-1,TreeLeaf('{p1}'),'unit is a simple phone, so context equals unit')
		unitToRightPhoneMap=TreeBranch(1,contextEqualsUnitLeaf,'map units to their rightmost phone ')
		unitToLeftPhoneMap={}		
		for i,u in enumerate(self._unitNames):
			if u[0]=='.' and u[-1]=='.':
				unit=u[1:-1].split('.',1)[0]
				componentPhones=unit.split('_')
			 	
			 	rightPhone=componentPhones[-1]
			 	l=TreeLeaf(contextToId[rightPhone])
			 	unitToRightPhoneMap[i]=TreeBranch(-1,l,'%s ==> %s'%(u,rightPhone))
			 	
			 	leftPhone=componentPhones[0]
			 	l=TreeLeaf(contextToId[leftPhone])
			 	unitToLeftPhoneMap[i]=TreeBranch(-1,l,'%s ==> %s'%(u,leftPhone))
			else:
				#assume it's a simple phoneme
				if i != contextToId[u]:
					logging.error('unit %s (index %d) is a simple phone but maps to index %d.  Right context tree will not be built correctly.',u,i,contextToId[u])
					raise ValueError()
				 	#l=TreeLeaf(contextToId[phone])
				 	#unitToRightPhoneMap[i]=TreeBranch(-1,l,'%s ==> %s'%(u,phone))
					
		lb=TreeBranch(2,unitToRightPhoneMap,'split on phoneTransition')
		lb[0]=noTransitionLeaf 
		lt=DT(dtWS,'prevPhone_phone_phoneTransition_2_prevPhone',3,lb)

		
		return dtWS
	
class DistictiveQuestionsFeatures(dict):
	def __init__(self):
		"Taken straight from Mark's train.pl script http://www.isle.uiuc.edu/courses/htk/train.pl, with some corrections"
		# Distinctive Feature Definitions for triphone clustering
		# This is fragile; it must be changed whenever the dictionary's
		#     phoneset is changed!!
		# 1. Vowel, Semivowels, and Flap
		#   1a. manner
		m = dict() #mark's questions
		
		m['syllabic_liquid'] = ['er', 'el']
		m['nonsyllabic_liquid'] = ['l', 'r']
		m['liquid'] = m['syllabic_liquid'] + m['nonsyllabic_liquid']
		m['glide'] = ['w', 'y']
		m['semivowel'] = m['liquid'] + m['glide']
		m['semivowel_or_flap'] = m['semivowel'] + ['dx' ]
		m['diphthong'] = [ 'ay', 'oy', 'aw' ]
		m['monophthong'] = ['aa','ao','ah','ow','uh','uw','ae','eh','ey','ih','iy','ax']
		m['vowel'] = m['monophthong'] + m['diphthong']
		m['vowel_or_flap'] = m['vowel'] + ['dx' ]
		m['syllabic_consonant'] = m['syllabic_liquid'] + ['en' ]
		m['syllabic'] = m['syllabic_consonant'] + m['vowel']
		m['vocalic'] = m['semivowel_or_flap'] + m['vowel']
		#   1b. low, high, back, front, tense, lax
		m['lowback_tense'] = [ 'aa', 'ao' ]
		m['lowback'] = [ 'aa', 'ao', 'ah' ]
		m['midback'] = [ 'ow'] 
		m['highback'] = [ 'uw', 'uh' ]
		m['lowfront'] = [ 'ae', 'eh' ]
		m['highfront'] = [ 'ih', 'iy' ]  #fixed
		m['highfront_final'] = m['highfront'] + ['ay', 'oy' ]
		m['lowfront_initial'] = m['lowfront'] + ['aw' ]
		m['highback_final'] = m['highback'] + ['aw' ]
		m['lowback_initial'] = m['lowback'] + ['ay' ]
		m['midback_initial'] = m['midback'] + ['oy' ]
		m['back'] = m['highback'] + ['ow'] + m['lowback']
		m['back_initial'] = m['back'] + m['diphthong']
		m['back_final'] = m['back'] + ['aw' ]
		m['front'] = m['highfront'] + ['ey'] + m['lowfront']
		m['front_final'] = m['front'] + ['ay', 'oy' ]
		m['low'] = m['lowback'] + m['lowfront']
		m['low_initial'] = m['low'] + m['diphthong']
		m['high'] = m['highback'] + m['highfront']
		m['high_final'] = m['high'] + m['diphthong']
		m['lowlax'] = [ 'eh', 'ah' ]
		m['lowtense'] = [ 'aa', 'ao', 'ae' ]
		m['lax'] = [ 'eh', 'ah', 'ih', 'uh' ]
		m['nontense'] = [ 'eh', 'ah', 'ih', 'uh', 'ax' ]
		#   1c. rounded vs. unrounded
		m['rounded_monophthong'] = [ 'ao', 'uw', 'uh', 'ow' ]
		m['rounded_vowel'] = m['rounded_monophthong'] + ['aw' ]
		m['rounded'] = m['rounded_vowel'] + ['w' ]
		m['semirounded'] = m['rounded'] + ['r', 'er' ]
		m['w_final'] = [ 'uw', 'ow', 'aw', 'w' ]
		#  1d. place
		m['retroflex'] = [ 'r', 'er' ]
		m['palatal_tense'] = ['iy', 'ey' ]
		m['palatal_lax'] = ['ih', 'eh' ]
		m['palatal_vowel'] = m['palatal_tense'] + m['palatal_lax']
		m['palatal_vocalic'] = m['palatal_vowel'] + ['y' ]
		m['y_final'] = [ 'y', 'iy', 'ey', 'ay', 'oy' ]
		m['uvular_vowel'] = [ 'ow', 'uh' ]
		m['uvular_syllabic'] = [ 'ow', 'uh', 'el' ]
		m['uvular_vocalic'] = [ 'ow', 'uh', 'el', 'l' ]
		m['uvular_high'] = [ 'uh', 'el', 'l' ]
		m['pharyngeal_tense'] = [ 'aa', 'ao', 'ae' ]
		m['pharyngeal'] = [ 'aa', 'ah', 'ao', 'ae' ]
		m['schwalike'] = [ 'ax', 'ah' ]
		m['central'] = [ 'ax', 'ah', 'uh' ]
		# 2. Consonants, including semivowels: most important division is probably place
		m['labiovelar'] = [ 'f', 'v' ]
		m['bilabial'] = ['m', 'p', 'b']
		m['labial']  = m['labiovelar'] + m['bilabial']
		m['dental'] = ['th', 'dh']
		m['alveolar'] = [ 't','d','n','s','z' ]
		m['palatal'] = ['sh','zh','ch', 'jh', 'y']
		m['palatal_nonaffricate'] = ['sh','zh', 'y']
		m['anterior'] = m['dental'] + m['alveolar']
		m['posterior'] = m['palatal'] + m['retroflex']
		m['distributed'] = m['dental'] + m['palatal']
		m['apical'] = m['alveolar'] + m['retroflex']
		m['coronal'] = m['apical'] + m['distributed']
		m['velar'] = [ 'ng', 'k', 'g' ]
		m['palatovelar'] = m['palatal'] + m['velar']
		m['glottal'] = [ 'q', 'hh' ]
		# 3. Consonants: manner and voicing
		m['vcd_stop'] = [ 'b', 'd', 'g' ]
		m['unv_stop'] = [ 'p', 't', 'k' ]
		m['aspirated'] = m['unv_stop'] + ['hh' ]
		m['stop'] = m['unv_stop'] + m['vcd_stop']
		m['affricate'] = ['jh', 'ch']
		m['stop_or_affricate'] = m['stop'] + m['affricate']
		m['unv_nonstrident'] = [ 'f', 'th' ]
		m['vcd_nonstrident'] = [ 'v', 'dh' ]
		m['unv_stridentf'] = [ 's', 'sh' ]
		m['vcd_stridentf'] = [ 'z', 'zh' ]
		m['unv_strident'] = [ 's', 'sh', 'ch' ]
		m['vcd_strident'] = [ 'z', 'zh', 'jh' ]
		m['vcd_fricative'] = m['vcd_nonstrident'] + m['vcd_stridentf']
		m['unv_fricative'] = m['unv_nonstrident'] + m['unv_stridentf']
		m['stridentf'] = m['vcd_stridentf'] + m['unv_stridentf']
		m['strident'] = m['vcd_strident'] + m['unv_strident']
		m['nonstrident'] = m['unv_nonstrident'] + m['vcd_nonstrident']
		m['fricative'] = m['stridentf'] + m['nonstrident']
		m['nonsyl_nasal'] = [ 'm', 'n', 'ng' ]
		m['nasal'] = [ 'm', 'n', 'ng', 'en' ]
		m['continuant'] = m['fricative'] + m['vocalic']
		m['sonorant'] = m['nasal'] + m['vocalic']
		m['vcd_obstruent'] = m['vcd_stop'] + m['vcd_fricative'] + ['jh' ]
		m['unv_obstruent'] = m['unv_stop'] + m['unv_fricative'] + ['ch' ]
		m['voiced'] = m['vcd_obstruent'] + m['sonorant']

		self.update(self._remapPhones(m))
		self['filled_pauses'] = [ 'SIL', 'NOISE', 'LAUGH', 'BREATH', 'COUGH', 'LIPSMACK', 'SIGH', 'SNEEZE'] 
		self['is_EOW'] = [ 'EOW'] 
		for p in self._phoneSet:
			self[p] = [ p ] 

		#I don't know if I should include EOW, as it would be context
		
	def _remapPhones(self, m):
		"Converts Mark's questions to use my phonebet - essentially drops some phones, since my phonebet is a subset of Mark's, plust the EOW phoneme"
		phoneRemap={'aa' : 'AA',
					'ae' : 'AE',
					'ah' : 'AH',
					'ao' : 'AO',
					'aw' : 'AW',
					'ax' : None,
					'ay' : 'AY',
					'b'  : 'B',
					'ch' : 'CH',
					'd'  : 'D',
					'dh' : 'DH',
					'dx' : None,
					'eh' : 'EH',
					'el' : None,
					'en' : None,
					'er' : 'ER',
					'ey' : 'EY',
					'f'  : 'F',
					'g'  : 'G',
					'hh' : 'HH',
					'ih' : 'IH',
					'iy' : 'IY',
					'jh' : 'JH',
					'k'  : 'K',
					'l'  : 'L',
					'm'  : 'M',
					'n'  : 'N',
					'ng' : 'NG',
					'ow' : 'OW',
					'oy' : 'OY',
					'p'  : 'P',
					'q'  : None,
					'r'  : 'R',
					's'  : 'S',
					'sh' : 'SH',
					't'  : 'T',
					'th' : 'TH',
					'uh' : 'UH',
					'uw' : 'UW',
					'v'  : 'V',
					'w'  : 'W',
					'y'  : 'Y',
					'z'  : 'Z',
					'zh' : 'ZH',
					}
		remapped = dict()
		inverseRemapped = dict()
		for k,v in sorted(m.items()):
			if k in remapped:
				logging.debug("not reinserting %s,%s",k,v)
			else:
				newV = sorted([phoneRemap[oldPhone] for oldPhone in v if phoneRemap[oldPhone]])
				if tuple(newV) in inverseRemapped:
					logging.debug("not reinserting %s for key %s, since it's a duplicate of of key %s ",newV, k, inverseRemapped[tuple(newV)])
				else:
					remapped[k]=newV
					inverseRemapped[tuple(newV)]=k
		return remapped
				
	@property
	def _phoneSet(self):
		s=set()
		for v in self.values():
			s.update(v)
		return sorted(s)
		
#the parser is used for generating documentation, so create it always, and augment __doc__ with usage info  
#This messes up epydoc a little, but allows us to keep a single version of documentation for all purposes
parser = makeParser()
__doc__ = __doc__.replace("%InsertOptionParserUsage%\n", parser.format_help())

if __name__ == "__main__":
	main(sys.argv)
	logging.info('Program finished on %s', time.ctime())
